# install.packages('knitr', repos = c('http://rforge.net', 'http://cran.rstudio.org'), type = 'source')
# install.packages("ggplot2")
# install.packages("gganimate")
# install.packages("gifski")
# install.packages("png")
# install.packages("transformr")
library("knitr")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("gganimate")
## Warning: package 'gganimate' was built under R version 3.4.4
library("gifski")
## Warning: package 'gifski' was built under R version 3.4.4
library("png")
## Warning: package 'png' was built under R version 3.4.4
library("transformr")
## Warning: package 'transformr' was built under R version 3.4.4
f2D <- function(x, t) {
return (cos(x) * sin(t * pi) - (abs(x)/10) + x/20)
}
par(mfrow=c(3, 3))
plot(f2D(seq(-10, 10, 0.1), 0.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.33), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.50), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.67), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.83), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.33), type="l")
par(mfrow=c(1, 1))
constructData2D <- function (func, xMin, xMax, precision, timeMin, timeMax)
{
# Interval is bounded by gganimate - this just works, do not ask questions
timeInterval = ((timeMax - timeMin) / 50) + 0.001
data <- data.frame(X=0, Y=0, Time=0)
i = 1
for (t in seq(timeMin, timeMax, timeInterval))
{
for (x in seq(xMin, xMax, precision))
{
data[i,]$X = x
data[i,]$Y = func(x, t)
data[i,]$Time = t
i = i + 1
}
}
return (data)
}
timeData = constructData2D(f2D, -10, 10, 0.2, 0, 5)
## Output the dataframe and the graph it represents
str(timeData)
## 'data.frame': 5050 obs. of 3 variables:
## $ X : num -10 -9.8 -9.6 -9.4 -9.2 -9 -8.8 -8.6 -8.4 -8.2 ...
## $ Y : num -1.5 -1.47 -1.44 -1.41 -1.38 -1.35 -1.32 -1.29 -1.26 -1.23 ...
## $ Time: num 0 0 0 0 0 0 0 0 0 0 ...
ggplot(timeData, aes(X, Y, group=Time)) +
geom_path()
visualizeFunction <- function(dataframe)
{
if (is.null(dataframe$OptimalX))
{
ggplot(dataframe, aes(X, Y, group=Time)) +
geom_path() +
transition_states(Time, transition_length=1, state_length=1, wrap=F) +
labs(title = "Time: {closest_state}") +
enter_fade() +
exit_fade()
}
else
{
ggplot(dataframe, aes(X, Y, group=Time)) +
geom_path() +
geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4) +
transition_states(Time, transition_length=1, state_length=1, wrap=F) +
labs(title = "Time: {closest_state}") +
enter_fade() +
exit_fade()
}
}
visualizeFunction(timeData)
The function updates the OptimalX and OptimalY columns of a dataframe containing X, Y, and Time to include the best results from hill climbing optimization function. It attempts to find the maxima.
ticksPerUnitTime controls how fast the hill climbing runs compared to the function. For every increase in the time value of 1.0, the hill climbing will update its value ticksPerUnitTime times.
costFunction is the function being optimized, of the form y = costFunction(x, time).
At time = 0, the algorithm will use a single random guess. That is, do not expect the first value to be a good one. Setting startX can control this instead of a random value.
windowSize controls how far the hill climb algorithm can guess for each guess. It will look up to (windowSize / 2) left or right from its current best
hillClimb <- function(timeDataFrame, ticksPerUnitTime, costFunction, startX = NULL, windowSize = 1)
{
# Setup variables
currentTime = min(timeDataFrame$Time)
currentTick = 1
currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
bestX = startX
if (is.null(bestX))
{
bestX = runif(1, min(currentSubset$X), max(currentSubset$X))
}
bestY = costFunction(bestX, currentTime)
width = windowSize / 2
# Record optimal x and y
timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalX = rep(bestX, nrow(currentSubset))
timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalY = rep(bestY, nrow(currentSubset))
# Iterate through time
nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
while (nrow(nextSubset) > 0)
{
# Calculate number of hill climb ticks (guesses) to do
nextTime = min(nextSubset$Time)
iterations = floor(nextTime * ticksPerUnitTime) - currentTick
# Update variables
currentTime = nextTime
currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
getOption <- function() {
return (runif(1,
max(bestX - width, min(currentSubset$X)),
min(bestX + width, max(currentSubset$X))))
}
bestY = costFunction(bestX, currentTime)
# Actually do hill climbing
if (iterations >= 1)
for (i in 1:iterations)
{
currentTick = currentTick + 1
testX = getOption()
testY = costFunction(testX, currentTime)
if (testY > bestY)
{
bestX = testX
bestY = testY
}
}
# Record new optimal x and y
timeDataFrame$OptimalX[timeDataFrame$Time == currentTime] = rep(bestX, nrow(currentSubset))
timeDataFrame$OptimalY[timeDataFrame$Time == currentTime] = rep(bestY, nrow(currentSubset))
# Load next time subset
nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
}
return (timeDataFrame)
}
# Approximately 1 guess per tick
timeData = hillClimb(timeData, 9, f2D, startX = -10)
## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables
## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables
visualizeFunction(timeData)
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
# Approximately 0.3 guesses per tick
timeData = hillClimb(timeData, 3, f2D, startX = -10)
visualizeFunction(timeData)
# Approximately 3 guesses per tick
timeData = hillClimb(timeData, 28, f2D, startX = -10)
visualizeFunction(timeData)
This next section sets up an agent-based model that exhibits complexity. We will use it to for the following workshop, where you will attempt to optimize over the output from the model.
The model is a square grid of cells, wrapping on both axes, with one agent per cell. Each agent has a value that it can adjust up or down. An agent wants to have a higher value than three of its neighbors, but does not want to be the highest. Agents will:
# From https://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
maxN <- function(x, N=2){
len <- length(x)
if(N>len){
warning('N greater than length(x). Setting N=length(x)')
N <- length(x)
}
sort(x,partial=len-N+1)[len-N+1]
}
# End sourced code
getAgentChange <- function(agentValue, neighborValues)
{
neighborValues = c(neighborValues, agentValue)
if (agentValue >= max(neighborValues))
return (-3)
if (agentValue < mean(neighborValues))
return (1)
if (agentValue >= mean(neighborValues) && agentValue < maxN(neighborValues, 2))
return (1)
return (0)
}
buildWorld <- function(height, width, startingValues = NULL)
{
if (is.null(startingValues))
{
startingValues = rep(100, height * width)
}
densityValue = 1 / (height * width)
sumStartValues = sum(startingValues)
world <- data.frame(X=0, Y=0, Value=startingValues[1], Density = densityValue)
i = 1
for (x in 1:width)
{
for (y in 1:height)
{
world[i,]$X = x
world[i,]$Y = y
world[i,]$Value = startingValues[i]
world[i,]$Density = startingValues[i] / sumStartValues
i = i + 1
}
}
return (world)
}
world = buildWorld(17, 17, seq(100, 128.9, 0.1))
ggplot(world, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
geom_contour(colour = "white")
updateWorld <- function(world, updateFunction, orthogonalOnly = T)
{
newWorld = buildWorld(max(world$X), max(world$Y))
for (x in 1:max(world$X))
{
xLower = x - 1
if (xLower == 0) xLower = max(world$X)
xUpper = x + 1
if (xUpper > max(world$X)) xUpper = 1
for (y in 1:max(world$Y))
{
yLower = y - 1
if (yLower == 0) yLower = max(world$Y)
yUpper = y + 1
if (yUpper > max(world$Y)) yUpper = 1
neighborValues = c()
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == y,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == y,]$Value)
neighborValues = c(neighborValues, world[world$X == x & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == x & world$Y == yLower,]$Value)
if (!orthogonalOnly)
{
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yLower,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yLower,]$Value)
}
currentValue = world[newWorld$X == x & newWorld$Y == y,]$Value
newWorld[newWorld$X == x & newWorld$Y == y,]$Value = currentValue + updateFunction(currentValue, neighborValues)
}
}
newWorld$Density = newWorld$Value / sum(newWorld$Value)
return (newWorld)
}
world2 = updateWorld(world, getAgentChange)
ggplot(world2, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
geom_contour(colour = "white")
# Assembles the base data - what elevation is each coordinate at for each time step
buildTimeWorld <- function(width, height, iterations)
{
lastStep = buildWorld(width, height, seq(100, 100+(height * width * 0.1), 0.1))
lastStep$Time = rep(1, nrow(lastStep))
totalData = lastStep
for (t in 2:iterations)
{
newData = updateWorld(lastStep, getAgentChange)
newData$Time = rep(t, nrow(newData))
lastStep = newData
totalData = rbind(totalData, newData)
}
return(totalData)
}
# Given a set of points over time, outputs the optimal point at each time step
getOptimalData <- function(timeWorld)
{
optimalData = data.frame(OptimalX = 0, OptimalY = 0, Time = 0)
i = 1
for (t in min(timeWorld$Time):max(timeWorld$Time))
{
timeSubset = timeWorld[timeWorld$Time == t,]
optimalData[i,]$OptimalX = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$X
optimalData[i,]$OptimalY = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$Y
optimalData[i,]$Time = t
i = i + 1
}
return (optimalData)
}
# Given a set of points over time and an optimization function, returns the optimization function's
# guess of optimality for each time step
getGuesses <- function(timeWorld, optimizeFunction, ticksPerTime = 1, ...)
{
guessData = data.frame(GuessedX = 0, GuessedY = 0, Time = 0)
xGuess = floor(mean(timeWorld$X))
yGuess = floor(mean(timeWorld$Y))
i = 1
for (t in min(timeWorld$Time):max(timeWorld$Time))
{
timeSubset = timeWorld[timeWorld$Time == t,]
guesses = optimizeFunction(timeSubset, ticksPerTime, xGuess, yGuess, ...)
xGuess = guesses[1]
yGuess = guesses[2]
guessData[i,]$GuessedX = xGuess
guessData[i,]$GuessedY = yGuess
guessData[i,]$Time = t
i = i + 1
}
return (guessData)
}
# Horrible ugly monster function to encapsulate a bunch of work.
# Builds a world (or reuses the one passed in a timeWorld). Uses that to obtain optimal points
# (if includeOptimalPoints is true), as well as run an optimization function (if optimizeFunction is not
# null). Returns an object with three dataframes (or NULLs) - worldData, optimalPoints, and guessedPoints.
assembleData <- function (width = 17, height = 17, iterations = 50, timeWorld = NULL, optimizeFunction = NULL, ticksPerTime = 1, includeOptimalPoints = T, ...)
{
finalData <- list(worldData = NULL, optimalPoints = NULL, guessedPoints = NULL)
if (is.null(timeWorld))
{
finalData$worldData = buildTimeWorld(width, height, iterations)
} else {
finalData$worldData = timeWorld
}
if (!is.null(optimizeFunction))
{
finalData$guessedPoints = getGuesses(finalData$worldData, optimizeFunction, ticksPerTime, ...)
}
if (includeOptimalPoints)
{
finalData$optimalPoints = getOptimalData(finalData$worldData)
}
return (finalData)
}
# Assembling a world only
finalData = assembleData(17, 17, 50)
# Save out the world for reuse
timeWorld = finalData$worldData
# See how much faster this is now?
finalData = assembleData(timeWorld = timeWorld)
visualizeFunction <- function(allData, fromtime = 25, untiltime = 50)
{
fromtime = max(min(fromtime, max(allData$worldData$Time)), min(allData$worldData$Time))
untiltime = max(min(untiltime, max(allData$worldData$Time)), min(allData$worldData$Time))
worldData = allData$worldData[allData$worldData$Time >= fromtime & allData$worldData$Time <= untiltime,]
animation = ggplot(worldData, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
# geom_contour(colour = "white", bins = 3) +
transition_states(Time, transition_length = 10, state_length = 0, wrap=F)
labs(title = "Time: {closest_state}") +
enter_fade() +
exit_fade() +
ease_aes("linear")
if (!is.null(allData$optimalPoints))
{
optData = allData$optimalPoints[allData$optimalPoints$Time >= fromtime & allData$optimalPoints$Time <= untiltime,]
lengthenFactor = nrow(worldData) / nrow(optData)
final = optData
for(i in 2:lengthenFactor) final = rbind(final, optData)
optData = final
optData = optData[with(optData, order(Time)), ]
animation = animation + geom_point(x=optData$OptimalX, y=optData$OptimalY, colour="hotpink", size=4)
}
if (!is.null(allData$guessedPoints))
{
guessData = allData$guessedPoints[allData$guessedPoints$Time >= fromtime & allData$guessedPoints$Time <= untiltime,]
lengthenFactor = nrow(worldData) / nrow(guessData)
final = guessData
for(i in 2:lengthenFactor) final = rbind(final, guessData)
guessData = final
guessData = guessData[with(guessData, order(Time)), ]
animation = animation + geom_point(x=guessData$GuessedX, y=guessData$GuessedY, colour="lawngreen", size=4)
}
animation
}
visualizeFunction(finalData)
All functions have a standardized signature:
evaluate <- function (allData, fromtime = 25, untiltime = 50)
{
if (is.null(allData$optimalPoints) || is.null(allData$guessedPoints))
{
stop("Incomplete data for evaluate; allData is missing either optimalPoints or guessedPoints")
}
expected = allData$optimalPoints
actual = allData$guessedPoints
fromtime = max(min(fromtime, max(expected$Time)), min(expected$Time))
untiltime = max(min(untiltime, max(expected$Time)), min(expected$Time))
averageDistance = 0
for (t in fromtime:untiltime)
{
expectedCoords = expected[expected$Time == t,]
actualCoords = actual[actual$Time == t,]
distance = sqrt((expectedCoords$OptimalX - actualCoords$GuessedX) ^ 2 + (expectedCoords$OptimalY - actualCoords$GuessedY) ^ 2)
averageDistance = averageDistance + distance
}
averageDistance = averageDistance / (untiltime - fromtime + 1)
return (averageDistance)
}
randomGuesses <- function(currentData, ticks, startX = 1, startY = 1)
{
bestX = round(runif(1, min = min(currentData$X), max = max(currentData$X)))
bestY = round(runif(1, min = min(currentData$Y), max = max(currentData$Y)))
return (c(bestX, bestY))
}
hillClimb <- function(currentData, ticks, startX = 1, startY = 1)
{
bestX = startX
bestY = startY
bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
for (i in 1:ticks)
{
xLower = bestX - 1
if (xLower == 0) xLower = max(currentData$X)
xUpper = bestX + 1
if (xUpper > max(currentData$X)) xUpper = 1
yLower = bestY - 1
if (yLower == 0) yLower = max(currentData$Y)
yUpper = bestY + 1
if (yUpper > max(currentData$Y)) yUpper = 1
if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > bestValue)
{
bestX = xLower
bestValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > bestValue)
{
bestX = xUpper
bestValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > bestValue)
{
bestY = yUpper
bestValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > bestValue)
{
bestY = yLower
bestValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
}
}
return (c(bestX, bestY))
}
Accepts two additional parameters:
linearCoolingFunction <- function (x) { return(1 - 0.1 * x) }
powerCoolingFunction <- function (x) { return(0.75 ^ x) }
decayCoolingFunction <- function (x) { return(1 / (x + 1)) }
simulatedAnnealing <- function(currentData, ticks, startX = 1, startY = 1, coolingFunction = NULL, temperatureTickAmount = NULL)
{
if (is.null(coolingFunction))
{
stop("Simulated Annealing requires CoolingFunction!")
}
if (is.null(temperatureTickAmount))
{
stop("Simulated Annealing requires TemperatureTickAmount!")
}
bestX = startX
bestY = startY
bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
currentCoolingValue = 0
for (i in 1:ticks)
{
xLower = bestX - 1
if (xLower == 0) xLower = max(currentData$X)
xUpper = bestX + 1
if (xUpper > max(currentData$X)) xUpper = 1
yLower = bestY - 1
if (yLower == 0) yLower = max(currentData$Y)
yUpper = bestY + 1
if (yUpper > max(currentData$Y)) yUpper = 1
nextX = bestX
nextY = bestY
nextValue = 0
if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > nextValue)
{
nextX = xLower
nextValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > nextValue)
{
nextX = xUpper
nextValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > nextValue)
{
nextY = yUpper
nextValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > nextValue)
{
nextY = yLower
nextValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
}
# The part that makes this simulated annealing, not hill climbing
currentCoolingValue = currentCoolingValue + temperatureTickAmount
if ((nextValue < bestValue && runif(1) < coolingFunction(currentCoolingValue)) ||
nextValue > bestValue)
{
bestValue = nextValue
bestX = nextX
bestY = nextY
}
}
return (c(bestX, bestY))
}
randomOutput = assembleData(timeWorld = timeWorld, optimizeFunction = randomGuesses, ticksPerTime = 1)
cat("Random guesses were, on average, off by", evaluate(randomOutput), "units.\n")
## Random guesses were, on average, off by 8.588169 units.
visualizeFunction(randomOutput)
hillClimbOutput = assembleData(timeWorld = timeWorld, optimizeFunction = hillClimb, ticksPerTime = 10)
cat("Hill climbing was, on average, off by", evaluate(hillClimbOutput), "units.\n")
## Hill climbing was, on average, off by 6.260702 units.
visualizeFunction(hillClimbOutput)
saOutput = assembleData(timeWorld = timeWorld, optimizeFunction = simulatedAnnealing, ticksPerTime = 100, coolingFunction = decayCoolingFunction, temperatureTickAmount = 0.05)
cat("Simulated Annealing was, on average, off by", evaluate(saOutput), "units.\n")
## Simulated Annealing was, on average, off by 6.019548 units.
visualizeFunction(saOutput)